This is an R Markdown document containing code and plots for Abumazen et al. “Nasopharyngeal metatranscriptomics reveals host-pathogen signatures of pediatric sinusitis”.
library(reshape2)
library(pheatmap)
library(pROC)
library(RColorBrewer)
library(dplyr)
library(plyr)
library(vioplot)
library(Metrics)
library(DESeq2)
library(tximport)
library(EnhancedVolcano)
library(patchwork)
library(knitr)
Reading in the kraken2 data
tb = read.delim("d1-d5_fastp_dehost_krakenmerged_reads_normalized.txt",sep='\t',col.names=(c("X","Y","Z")))
matr = dcast(tb,Y ~ X,value.var = "Z")
rownames(matr) = matr[,1]
matr = matr[,-1]
for (i in 1:nrow(matr))
{ matr[i,which(is.na(matr[i,]))] = 0
}
matr = t(data.matrix(matr))
splitrownames = vector(length = nrow(matr))
for (i in 1:length(splitrownames))
{
rownames(matr)[i] = strsplit(rownames(matr),"_")[[i]][1]
}
Reading in the metadata
metadata = read.csv("metadata.12.20.updated.csv",header=T)
metadata = metadata[which(metadata$batch != 1),] #removing samples from batch 1
shared = intersect(rownames(matr),metadata[,1])
matr = matr[match(shared,rownames(matr)),]
metadata = metadata[match(shared,metadata[,1]),]
annotation_row = metadata[,c(22,24,26)]
annotation_row = (annotation_row -2) * -1 #just to make it going 0 to 1
rownames(annotation_row) = metadata[,1]
colnames(annotation_row) = c("SPN", "HFLU", "MCAT")
cols = c("white",colorRampPalette((brewer.pal(n = 7, name =
"YlGnBu")))(99))
ann_colors = list(
SPN = c("white", "light gray"),
HFLU = c("white", "light gray"),
MCAT = c("white", "light gray")
)
pheatmap(log(matr[,c(grep("Moraxella cat", colnames(matr)),grep("Streptococcus pne", colnames(matr)),grep("Haemophilus inf", colnames(matr)))] + 1, 10),annotation_row=annotation_row[,c(2,1,3)],cluster_cols = F, show_rownames = FALSE,col=cols,annotation_colors = ann_colors)
group = metadata[,"mcat"]
group <- mapvalues(group,
from=c("2","1"),
to=c("-","+"))
group = as.factor(group)
abund = log(matr[,"Moraxella catarrhalis"] + 1,10)
p = ggplot(data.frame(mcat = abund), aes(y = abund, x = group,fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
theme(legend.position = "none") +
ggtitle("mcat") + theme(plot.title = element_text(hjust = 0.5)) +
labs(y = "RNA-seq abundance\nlog10(rpm)", x = "Culture test") +
geom_point(pch = 21, position = position_jitterdodge())
p
group = metadata[,"spn"]
group <- mapvalues(group,
from=c("2","1"),
to=c("-","+"))
group = as.factor(group)
abund = log(matr[,"Streptococcus pneumoniae"] + 1,10)
p = ggplot(data.frame(spn = abund), aes(y = abund, x = group,fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
theme(legend.position = "none") +
ggtitle("spn") + theme(plot.title = element_text(hjust = 0.5)) +
labs(y = "RNA-seq abundance\nlog10(rpm)", x = "Culture test") +
geom_point(pch = 21, position = position_jitterdodge())
p
group = metadata[,"hflu"]
group <- mapvalues(group,
from=c("2","1"),
to=c("-","+"))
group = as.factor(group)
abund = log(matr[,"Haemophilus influenzae"] + 1,10)
p = ggplot(data.frame(hflu = abund), aes(y = abund, x = group,fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
theme(legend.position = "none") +
ggtitle("hflu") + theme(plot.title = element_text(hjust = 0.5)) +
labs(y = "RNA-seq abundance\nlog10(rpm)", x = "Culture test") +
geom_point(pch = 21, position = position_jitterdodge())
p
bacterial_detection_threshold = 3 # gives a good balance of sensitivity and specificity
# number of HFLU detected
hflu = which(matr[,"Haemophilus influenzae"] >= bacterial_detection_threshold)
length(hflu)
## [1] 81
# number of SPN detected
spn = which(matr[,"Streptococcus pneumoniae"] >= bacterial_detection_threshold)
length(spn)
## [1] 73
# number of MCAT detected
mcat = which(matr[,"Moraxella catarrhalis"] >= bacterial_detection_threshold)
length(mcat)
## [1] 137
#any of the above three
length(unique(c(hflu,spn,mcat)))
## [1] 177
#two or more
length(unique(c(intersect(hflu,mcat),intersect(hflu,spn),intersect(spn,mcat))))
## [1] 89
#all three
length(intersect(intersect(hflu,mcat),spn))
## [1] 25
values = unique(rev(sort(matr[,"Moraxella catarrhalis"])))
TPR = vector(length = length(values))
FPR = vector(length = length(values))
for (i in 1:length(values))
{ k = values[i]
matches = which(matr[,"Moraxella catarrhalis"] >= k)
TPR[i] = length(intersect(matches,which(metadata[,"mcat"] == 1))) / length(which(metadata[,"mcat"] == 1))
FPR[i] = length(intersect(matches,which(metadata[,"mcat"] == 2))) / length(which(metadata[,"mcat"] == 2))
}
prediction = matr[,"Moraxella catarrhalis"]
response = metadata[,"mcat"]
response = (response -2) * -1
auc(response,prediction)
## [1] 0.823844
auc_mcat = auc(response,prediction)
plot(FPR,TPR,type="l",main=paste("mcat","AUC = ",round(auc_mcat,2)))
matches = which(matr[,"Moraxella catarrhalis"] >= bacterial_detection_threshold)
tpr0 = length(intersect(matches,which(metadata[,"mcat"] == 1))) / length(which(metadata[,"mcat"] == 1))
fpr0 = length(intersect(matches,which(metadata[,"mcat"] == 2))) / length(which(metadata[,"mcat"] == 2))
points(x = fpr0, y = tpr0,cex=2)
# prints out the sensitivity and the specificity given the threshold defined above
print(c("mcat",tpr0 * 100,100-(fpr0 * 100)))
## [1] "mcat" "84.7457627118644" "64.0776699029126"
sens_mcat = tpr0 * 100
spec_mcat = 100-(fpr0 * 100)
values = unique(rev(sort(matr[,"Streptococcus pneumoniae"])))
TPR = vector(length = length(values))
FPR = vector(length = length(values))
for (i in 1:length(values))
{ k = values[i]
matches = which(matr[,"Streptococcus pneumoniae"] >= k)
TPR[i] = length(intersect(matches,which(metadata[,"spn"] == 1))) / length(which(metadata[,"spn"] == 1))
FPR[i] = length(intersect(matches,which(metadata[,"spn"] == 2))) / length(which(metadata[,"spn"] == 2))
}
prediction = matr[,"Streptococcus pneumoniae"]
response = metadata[,"spn"]
response = (response -2) * -1
auc(response,prediction)
## [1] 0.8915326
auc_spn = auc(response,prediction)
plot(FPR,TPR,type="l",main=paste("spn","AUC = ",round(auc_spn,2)))
matches = which(matr[,"Streptococcus pneumoniae"] >= bacterial_detection_threshold)
tpr0 = length(intersect(matches,which(metadata[,"spn"] == 1))) / length(which(metadata[,"spn"] == 1))
fpr0 = length(intersect(matches,which(metadata[,"spn"] == 2))) / length(which(metadata[,"spn"] == 2))
points(x = fpr0, y = tpr0,cex=2)
# prints out the sensitivity and the specificity given the threshold defined above
print(c("spn",tpr0 * 100,100-(fpr0 * 100)))
## [1] "spn" "81.4285714285714" "89.4039735099338"
sens_spn = tpr0 * 100
spec_spn = 100-(fpr0 * 100)
values = unique(rev(sort(matr[,"Haemophilus influenzae"])))
TPR = vector(length = length(values))
FPR = vector(length = length(values))
for (i in 1:length(values))
{ k = values[i]
matches = which(matr[,"Haemophilus influenzae"] >= k)
TPR[i] = length(intersect(matches,which(metadata[,"hflu"] == 1))) / length(which(metadata[,"hflu"] == 1))
FPR[i] = length(intersect(matches,which(metadata[,"hflu"] == 2))) / length(which(metadata[,"hflu"] == 2))
}
prediction = matr[,"Haemophilus influenzae"]
response = metadata[,"hflu"]
response = (response -2) * -1
auc(response,prediction)
## [1] 0.9549669
auc_hflu = auc(response,prediction)
plot(FPR,TPR,type="l",main=paste("hflu","AUC = ",round(auc_hflu,2)))
matches = which(matr[,"Haemophilus influenzae"] > bacterial_detection_threshold)
tpr0 = length(intersect(matches,which(metadata[,"hflu"] == 1))) / length(which(metadata[,"hflu"] == 1))
fpr0 = length(intersect(matches,which(metadata[,"hflu"] == 2))) / length(which(metadata[,"hflu"] == 2))
points(x = fpr0, y = tpr0,cex=2)
# prints out the sensitivity and the specificity given the threshold defined above
print(c("hflu",tpr0 * 100,100-(fpr0 * 100)))
## [1] "hflu" "94.2857142857143" "90.0662251655629"
sens_hflu = tpr0 * 100
spec_hflu = 100-(fpr0 * 100)
accuracies = cbind(sens = c(mcat = sens_mcat,spn = sens_spn,hflu = sens_hflu),spec = c(mcat = spec_mcat,spec_spn,spec_hflu),auc = c(auc_mcat, auc_spn, auc_hflu))
kable(accuracies)
| sens | spec | auc | |
|---|---|---|---|
| mcat | 84.74576 | 64.07767 | 0.8238440 |
| spn | 81.42857 | 89.40397 | 0.8915326 |
| hflu | 94.28571 | 90.06623 | 0.9549669 |
To explore, we will first subset the data to all viruses (species with “virus” in their name) with greater than log10(rpm) +1 > 0.3 (roughly 1 read per million), as well as any species with log10+1 abundance greater than 3 since this threshold includes our three bacterial sinusitis pathogens (SPN, HFU, MCAT).
matr.subset = matr[,unique(c(which(apply(log(matr + 1,10),2,max) > 3),intersect(grep("virus",colnames(matr)),which(apply(log(matr + 1,10),2,max) > 0.3))))]
pathTable = rev(sort(apply(log(matr.subset + 1,10),2,max)))
cols = vector(length = length(pathTable))
cols[] = "red"
cols[which(names(pathTable) %in% c(three_bacteria,panel_viruses))] = "black"
par(mfrow=c(2,1))
barplot(pathTable,col=cols,las=3)
pheatmap(t(log(matr[,names(pathTable)] + 1,10)),cluster_rows=F,fontsize_row=5)
The above list of species was then investigated further using BLAST and literature searches to identify a smaller subset of pathogens of interest.
pathogensOfInterest = c("Influenza A virus", "Influenza C virus", "Haemophilus influenzae", "Human coronavirus NL63", "Influenza B virus", "Betacoronavirus 1", "Respiratory syncytial virus", "Human coronavirus HKU1", "Human respirovirus 3", "Human orthopneumovirus", "Moraxella nonliquefaciens", "Fusobacterium nucleatum", "Human mastadenovirus C", "Human metapneumovirus", "Moraxella catarrhalis", "Prevotella intermedia", "Metamycoplasma salivarium", "Streptococcus pneumoniae", "Pasteurella multocida", "Human orthorubulavirus 4", "Tannerella forsythia", "Corynebacterium pseudodiphtheriticum", "Moraxella osloensis", "Enterovirus B", "Mycoplasma pneumoniae", "Enterovirus A", "Human orthorubulavirus 2", "Chlamydia pneumoniae", "Treponema medium", "Human coronavirus 229E", "Rhinovirus C", "Enterovirus D", "Rhinovirus A", "Human respirovirus 1", "Murine leukemia virus", "Rhinovirus B", "Human mastadenovirus B", "Human gammaherpesvirus 4", "Human betaherpesvirus 5", "Mamastrovirus 9", "Cardiovirus B", "Betapolyomavirus quartihominis", "Parechovirus A")
matr.subset = matr[,pathogensOfInterest]
pathTable = rev(sort(apply(log(matr.subset + 1,10),2,max)))
cols = vector(length = length(pathTable))
cols[] = "red"
cols[which(names(pathTable) %in% c(three_bacteria,panel_viruses))] = "black"
par(mfrow=c(2,1))
barplot(pathTable,col=cols,las=3,cex.names=0.5)
#plot the pathogen-positive samples (basedon culture/PCR)
breaksList = seq(0, 5, by = 1)
cols = c("white",colorRampPalette(brewer.pal(n = 4, name="YlGnBu"))(4))
temp.matr = t(log(matr[which(apply(metadata[,c(27,64)],1,sum) != 0),names(pathTable)] + 1,10))
temp.matr[temp.matr > 5] = 5 # adds ceiling to data to make color range consistent with next plot
pheatmap(temp.matr,cluster_rows=F,fontsize_row=5,fontsize_col=3,zlim=c(0,5),color =rev(cols), breaks=breaksList)
#plot the pathogen-negative samples (basedon culture/PCR)
pheatmap(t(log(matr[which(apply(metadata[,c(27,64)],1,sum) == 0),names(pathTable)] + 1,10)),cluster_rows=F,fontsize_row=5,fontsize_col=3,zlim=c(0,5),color = rev(cols),breaks=breaksList)
genesymbols = read.delim("host-expression/gencode.v39.metadata.HGNC")
metadata = metadata[which(metadata$batch != 1),] #removing samples from batch 1
metadata[,"batch"] = as.factor(metadata[,"batch"])
metadata[,"bv_both_none"] = as.factor(metadata[,"bv_both_none"])
metadata[metadata[,"cum_pathogn"] > 1,"cum_pathogn"] = 1
metadata[,"cum_pathogn"] = as.factor(metadata[,"cum_pathogn"])
metadata[,"sex"] = as.factor(metadata[,"sex"])
metadata$age_scaled = scale(metadata$Age.in.years)
onlyvirusonlybacteria_samples = metadata[which((metadata$bv_both_none == 1) | (metadata$bv_both_none == 2)),]
onlyvirusonlybacteria_samples = onlyvirusonlybacteria_samples %>% arrange(cum_pathogn)
subsetfiles <- file.path("host-expression/quants", onlyvirusonlybacteria_samples$Filename, "quant.sf")
subset_txi.salmon <- tximport(subsetfiles, type = "salmon", tx2gene = genesymbols)
dds <- DESeqDataSetFromTximport(subset_txi.salmon, onlyvirusonlybacteria_samples, ~ batch + sex + age_scaled + cum_pathogn)
dds <- DESeq(dds)
res <- results(dds)
summary(res)
##
## out of 31126 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 2944, 9.5%
## LFC < 0 (down) : 1891, 6.1%
## outliers [1] : 0, 0%
## low counts [2] : 8333, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
pairwise_genes = which(res$padj < 0.001)
bac_upDEGs = intersect(which(res$padj < 0.001),which(res$log2FoldChange > 0))
viral_upDEGs = intersect(which(res$padj < 0.001),which(res$log2FoldChange < 0))
There are 548 bacterial upDEGs and 273 viral upDEGs.
EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue',
pCutoff = 0.001,
FCcutoff = 0,
xlim = c(-7, 7),
ylim=c(-1,27),
pointSize = 1.5,
labSize = 2.5,
title = 'DESeq2 results',
subtitle = 'Differential expression',
caption = 'p-value cutoff, 0.001',
legendPosition = "none",
legendLabSize = 14,
col = c('grey30', 'light gray', 'royalblue', 'red2'),
colAlpha = 0.9,
drawConnectors = FALSE,
widthConnectors = 0.5)
Read in full set of files, including those with NO pathogens and BOTH (virus and bacteria)
files <- file.path("host-expression/quants", metadata$Filename, "quant.sf")
txi.salmon <- tximport(files, type = "salmon", tx2gene = genesymbols)
pdds <- DESeqDataSetFromTximport(txi.salmon, metadata, ~ batch + sex + age_scaled + cum_pathogn)
prld <- varianceStabilizingTransformation(pdds, blind = TRUE)
These are plots of host gene expression per clinical group
pathOrNot = as.numeric(metadata[,"bv_both_none"]) # using clinical categories
genelist = c("CXCL10", "PRF1", "IFI27", "CCL8", "PTGS2", "S100A9", "PLAUR", "TNFAIP3","IL1B")
for (i in 1:length(genelist)) {
gene = genelist[i]
print(gene)
p = ggplot(data.frame(cbind(assay(prld)[gene,],pathOrNot)), aes(y = V1, x = as.factor(pathOrNot),fill=as.factor(pathOrNot))) +
geom_boxplot(outlier.shape=NA) +
ggtitle(gene) +
theme(axis.title = element_blank(), legend.position="none") +
labs(y = "RNA-seq abundance\nlog10(rpm)", x = "Culture test") +
geom_point(pch = 21, position = position_jitterdodge()) +
scale_fill_discrete(labels=c("None detected", "Bacterial", "Viral", "Both")) +
labs(fill = "Infection type")
assign(paste0("p", i), p)
}
## [1] "CXCL10"
## [1] "PRF1"
## [1] "IFI27"
## [1] "CCL8"
## [1] "PTGS2"
## [1] "S100A9"
## [1] "PLAUR"
## [1] "TNFAIP3"
## [1] "IL1B"
p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 + theme(legend.position="right") + plot_layout(nrow = 3)
Setting up data for sorted heatmaps
#setting up a temp matrix with Z-score scaled expression levels
temp = t(scale(t(assay(prld[,]))))
temp[temp < -4] = -4
temp[temp > +4] = +4
temp[is.na(temp)] = 0
annotation_col = data.frame(
Category = metadata$high_pathogens,
Viral_pathogen_abundance = metadata[,"viral_quantile"],
Bacterial_pathogen_abundance = metadata[,"threebac_quantile"],
Symptom_severity_score = metadata[,"PRSS"],
Days_with_cold = metadata[,"days_with_cold"],
Infection = factor(metadata[,"bv_both_none"], labels = c("None","Viral", "Bacterial", "Both"))
)
heatmap <- pheatmap(temp[bac_upDEGs,order(bacPathAbundance)],
annotation_colors = list(Infection = c(None = "white", Bacterial = "light blue", Viral = "gold", Both = "black"),
Resistance = c("No" = "green", "Yes" = "red")),
annotation_col = annotation_col,
show_colnames = F,
show_rownames = F,
border_color=NA,
col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(55),
#col = colorRampPalette(c("navy", "white", "red"))(50),
fontsize_row=6,
cluster_cols = F,
cluster_rows = T,
annotation_legend = TRUE)
heatmap1 <- pheatmap(temp[viral_upDEGs,order(metadata$high_pathogen)],
annotation_colors = list(Infection = c(None = "white", Bacterial = "light blue", Viral = "gold", Both = "black"),
Resistance = c("No" = "white", "Yes" = "black")),
annotation_col = annotation_col,
show_colnames = F,
show_rownames = F,
border_color=NA,
col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(55),
#col = colorRampPalette(c("navy", "white", "red"))(50),
fontsize_row=6,
cluster_cols = F,
cluster_rows = T,
annotation_legend = TRUE)
heatmap2 <- pheatmap(temp[bac_upDEGs,order(metadata$high_pathogen)],
annotation_colors = list(Infection = c(None = "white", Bacterial = "light blue", Viral = "gold", Both = "black"),
Resistance = c("No" = "white", "Yes" = "black")),
annotation_col = annotation_col,
show_colnames = F,
show_rownames = F,
border_color=NA,
col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(55),
#col = colorRampPalette(c("navy", "white", "red"))(50),
fontsize_row=6,
cluster_cols = F,
cluster_rows = T,
annotation_legend = TRUE)
temp = t(scale(t(assay(prld[,]))))
viralResponseScore = apply(temp[viral_upDEGs,],2,mean)
bacterialResponseScore = apply(temp[bac_upDEGs,],2,mean)
group = as.numeric(metadata[,"threebac_quantile"])
p = ggplot(data.frame(cbind(bacterialResponseScore,group, row.names=NULL)), aes(y = bacterialResponseScore, x = as.factor(group),fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
geom_point(pch = 21, position = position_jitterdodge()) +
labs(fill = "Pathogen abundance") +
labs(x = "Bacterial pathogen abundance (decile)", y = "Bacterial response score\n(Mean expression Z-score of bacterial upDEGs)") +
theme(legend.position = "none")
p
group = as.numeric(metadata[,"viral_quantile"])
p = ggplot(data.frame(cbind(viralResponseScore,group, row.names=NULL)), aes(y = viralResponseScore, x = as.factor(group),fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
geom_point(pch = 21, position = position_jitterdodge()) +
labs(fill = "Pathogen abundance") +
labs(x = "Viral pathogen abundance (decile)", y = "Viral response score\n(Mean expression Z-score of viral upDEGs)") +
theme(legend.position = "none")
p
group = as.numeric(metadata[,"high_pathogens"])
p = ggplot(data.frame(cbind(bacterialResponseScore,group, row.names=NULL)), aes(y = bacterialResponseScore, x = as.factor(group),fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
geom_point(pch = 21, position = position_jitterdodge()) +
scale_fill_discrete(labels=c("Both low", "Viral high", "Bacterial high", "Both high")) +
labs(fill = "Pathogen abundance") +
labs(x = "Pathogen abundance (decile)", y = "Bacterial response score\n(Mean expression Z-score of bacterial upDEGs)")
p
group = as.numeric(metadata[,"high_pathogens"])
p = ggplot(data.frame(cbind(viralResponseScore,group, row.names=NULL)), aes(y = viralResponseScore, x = as.factor(group),fill=as.factor(group))) +
geom_boxplot(outlier.shape=NA) +
geom_point(pch = 21, position = position_jitterdodge()) +
scale_fill_discrete(labels=c("Both low", "Viral high", "Bacterial high", "Both high")) +
labs(fill = "Pathogen abundance") +
labs(x = "Pathogen abundance (decile)", y = "Viral response score\n(Mean expression Z-score of viral upDEGs)")
p
pathOrNot = as.numeric(metadata[,"high_pathogens"]) # using categories "Both low" "Viral high" "Bacterial high" "Both high"
genelist = c("PTGS2", "S100A9", "PLAUR", "TNFAIP3", "SERPINA2", "CXCL2","IL1B", "CXCL11","IFNL1","CXCL10", "PRF1", "IFI27", "CCL8","OAS2")
for (i in 1:length(genelist)) {
gene = genelist[i]
p = ggplot(data.frame(cbind(assay(prld)[gene,],pathOrNot)), aes(y = V1, x = as.factor(pathOrNot),fill=as.factor(pathOrNot))) +
geom_boxplot(outlier.shape=NA) +
ggtitle(gene) +
theme(axis.title = element_blank(), legend.position="none") +
geom_point(pch = 21, position = position_jitterdodge()) +
scale_fill_discrete(labels=c("None detected", "Viral", "Bacterial", "Both")) +
labs(fill = "Infection type")
assign(paste0("p", i), p)
}
p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 + p10 + p11 + p12 + p13 + p14 + plot_layout(nrow = 2)
Here, we will compute the correlation between the bacterial host response score and the total bacterial pathogen abundance (MCAT + SPN + HFLU), and compare this to 1000 randomly samples selections of three bacterial species from the data.
abundant_bacteria = setdiff(which(apply(matr,2,max) >= 10),grep("virus",colnames(matr)))
cors = vector(length = 1000)
for (i in 1:1000)
{
randSetOfThree = sample(abundant_bacteria,3)
threeBacAbundance = apply(matr[,randSetOfThree],1,sum) # sum of their individual RPMs
#ntile(threeBacAbundance, 10)
cors[i] = cor(bacterialResponseScore,log(threeBacAbundance + 1,10))
}
hist(cors,xlim=c(-1,1),breaks=100)
abline(v = cor(bacterialResponseScore, log(bacPathAbundance + 1,10)))
Viral ROC
response = vector(length = nrow(metadata))
response[] = 0
response[which(metadata$high_pathogen == 1 | metadata$high_pathogen == 3)] = 1
prediction = viralResponseScore
bacPlot = plot.roc(roc(response,prediction))
#coords(bacPlot) #for a list of sensitivities and specificities
auc(response,prediction)
## [1] 0.80819
Bacterial ROC
response = vector(length = nrow(metadata))
response[] = 0
response[which(metadata$high_pathogen == 2 | metadata$high_pathogen == 3)] = 1
prediction = bacterialResponseScore
bacPlot = plot.roc(roc(response,prediction))
#coords(bacPlot) #for a list of sensitivities and specificities
auc(response,prediction)
## [1] 0.7902539